#!/bin/python
# -*- coding: utf-8 -*-
# __author__ = sunsn
# __datetime__ = 2020/12/31 14:23
"""
数据获取接口
将多个文件合并方便使用
"""
import sys
import time
import pathlib
import logging
from typing import Optional
from functools import wraps

import requests
import pandas as pd

BASE_DIR = pathlib.Path(__file__).parent.parent.parent
DATA_DIR = BASE_DIR / "data"
sys.path.insert(0, str(BASE_DIR))

logger = logging.getLogger(__name__)


class MaxRetryError(Exception):
    pass


def retry(max_tries: int = 3, interval: int = 5):
    @wraps
    def inner(func, *args, **kwargs):
        for i in range(1, max_tries + 1):
            try:
                res = func(*args, **kwargs)
            except Exception as e:
                logger.error(f"获取数据异常，错误原因： {e}")
                continue
            else:
                if res is not None:
                    return res

            logger.info(f"第{i}次，取数失败")
            time.sleep(interval)

        raise MaxRetryError(f"重试次数已达上限：{max_tries}次")

    return inner


class Settings():
    SONAR_FILEPATH = DATA_DIR / "sonar.csv"
    ABALONE_FILEPATH = DATA_DIR / "abalone.csv"
    RED_WINE_QUALITY_FILEPATH = DATA_DIR / "red_wine_quality.csv"
    GLASS_FILEPATH = DATA_DIR / "glass.csv"


settings = Settings()


class DataApi(object):
    # url地址
    _sonar_url = ("https://archive.ics.uci.edu/ml/machine-learning-"
                  "databases/undocumented/connectionist-bench/sonar/sonar.all-data")
    _abalone_url = ("http://archive.ics.uci.edu/ml/machine-"
                    "learning-databases/abalone/abalone.data")
    _red_wine_quality_url = ("http://archive.ics.uci.edu/ml/machine-"
                             "learning-databases/wine-quality/winequality-red.csv")
    _glass_url = ("https://archive.ics.uci.edu/ml/machine-"
                  "learning-databases/glass/glass.data")

    @classmethod
    @retry()
    def http_get(cls, url: str) -> Optional[str]:
        try:
            req = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        except Exception as e:
            logger.error(f"请求数据失败，错位为： {e} \n 请求地址：{url}")
            return None
        else:
            if req.status_code != 200:
                return None
            else:
                return req.text

    @classmethod
    def get_sonar_data(cls):
        cols = [f"V{i}" for i in range(60+1)]
        try:
            df_sonar = pd.read_csv(settings.SONAR_FILEPATH, header=0)
        except Exception as e:
            print(f"获取sonar数据失败，错误原因：{e}")
            try:
                data = cls.http_get(cls._sonar_url)
            except MaxRetryError as e:
                df_sonar = pd.DataFrame(columns=cols)
            else:
                rows = []
                for line in data.split("\n"):
                    rows.append(line.split(","))
                df_sonar = pd.DataFrame(data=rows, columns=cols)
                df_sonar.to_csv(settings.SONAR_FILEPATH, index=False)

        return df_sonar

    @classmethod
    def get_abalone_data(cls, to_cn_cols=False):
        cols_mapping = {
            'Sex': '性别',
            'Length': '长度',
            'Diameter': '直径',
            'Height': '高度',
            'Whole weight': '整体重量',
            'Shucked weight': '去壳后重量',
            'Viscera weight': '脏器重量',
            'Shell weight': '壳的重量',
            'Rings': '环数'  # 目标变量
        }
        try:
            df_abalone = pd.read_csv(settings.ABALONE_FILEPATH, header=0)
        except Exception as e:
            print(f"获取abalone数据失败，错误原因：{e}")
            try:
                data = cls.http_get(cls._abalone_url)
            except MaxRetryError as e:
                df_abalone = pd.DataFrame(columns=list(cols_mapping.keys()))
            else:
                rows = []
                for line in data.split("\n"):
                    rows.append(line.split(","))
                df_abalone = pd.DataFrame(data=rows, columns=list(cols_mapping.keys()))
                df_abalone.to_csv(settings.ABALONE_FILEPATH, index=False)

        if to_cn_cols:
            df_abalone.rename(columns=cols_mapping, inplace=True)

        return df_abalone

    @classmethod
    def get_red_wine_quality_data(cls, to_cn_cols=False):
        cols_mapping = {
            'fixed acidity': '非挥发性酸',
            'volatile acidity': '挥发性酸',
            'citric acid': '柠檬酸',
            'residual sugar': '残留糖分',
            'chlorides': '氯化物',
            'free sulfur dioxide': '游离二氧化硫',
            'total sulfur dioxide': '总二氧化硫',
            'density': '密度',
            'pH': 'PH值',
            'sulphates': '亚硝酸盐',  # 百度翻译是：硫酸盐
            'alcohol': '酒精含量',
            'quality': '口感评分'
        }

        try:
            df_wine = pd.read_csv(settings.RED_WINE_QUALITY_FILEPATH, header=0)
        except Exception as e:
            print(f"获取red_wine_quality数据失败，错误原因：{e}")
            try:
                data = cls.http_get(cls._red_wine_quality_url)
            except MaxRetryError as e:
                df_wine = pd.DataFrame(columns=list(cols_mapping.keys()))
            else:
                rows = []
                for idx, line in enumerate(data.split("\n")):
                    if idx == 0:
                        continue
                    rows.append(line.split(";"))
                df_wine = pd.DataFrame(data=rows, columns=list(cols_mapping.keys()))
                df_wine.to_csv(settings.RED_WINE_QUALITY_FILEPATH, index=False)

        if to_cn_cols:
            df_wine.rename(columns=cols_mapping, inplace=True)

        return df_wine

    @classmethod
    def get_glass_data(cls, to_cn_cols=False):
        cols_mapping = {
            'ID': 'ID',
            'RI': '折射率',
            'Na': '钠',
            'Mg': '镁',
            'Al': '铝',
            'Si': '硅',
            'K': '钾',
            'Ca': '钙',
            'Ba': '钡',
            'Fe': '铁',
            'Type': '类型',
        }

        try:
            df_glass = pd.read_csv(settings.GLASS_FILEPATH, header=0)
        except Exception as e:
            print(f"获取sonar数据失败，错误原因：{e}")
            try:
                data = cls.http_get(cls._glass_url)
            except MaxRetryError as e:
                df_glass = pd.DataFrame(columns=list(cols_mapping.keys()))
            else:
                rows = []
                for line in data.split("\n"):
                    rows.append(line.split(","))
                df_glass = pd.DataFrame(data=rows, columns=list(cols_mapping.keys()))
                df_glass.to_csv(settings.GLASS_FILEPATH, index=False)

        if to_cn_cols:
            df_glass.rename(columns=cols_mapping, inplace=True)

        return df_glass


if __name__ == '__main__':
    df = DataApi.get_sonar_data()
    print("sonar: \n", df.head(5))

    df = DataApi.get_abalone_data()
    print("abalone: \n", df.head(5))

    df = DataApi.get_red_wine_quality_data()
    print("red_wine_quality: \n", df.head(5))

    df = DataApi.get_glass_data()
    print("glass: \n", df.head(5))
